/*- * See the file LICENSE for redistribution information. * * Copyright (c) 2002-2006 * Sleepycat Software. All rights reserved. * * $Id: Evictor.java,v 1.1 2006/05/06 09:02:10 ckaestne Exp $ */ package com.sleepycat.je.evictor; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import com.sleepycat.je.DatabaseException; import com.sleepycat.je.EnvironmentStats; import com.sleepycat.je.StatsConfig; import com.sleepycat.je.config.EnvironmentParams; import com.sleepycat.je.dbi.DatabaseImpl; import com.sleepycat.je.dbi.DbConfigManager; import com.sleepycat.je.dbi.DbTree; import com.sleepycat.je.dbi.EnvironmentImpl; import com.sleepycat.je.dbi.INList; import com.sleepycat.je.dbi.MemoryBudget; import com.sleepycat.je.latch.LatchSupport; import com.sleepycat.je.log.LogManager; import com.sleepycat.je.tree.BIN; import com.sleepycat.je.tree.IN; import com.sleepycat.je.tree.Node; import com.sleepycat.je.tree.SearchResult; import com.sleepycat.je.tree.Tree; import com.sleepycat.je.utilint.DaemonThread; import com.sleepycat.je.utilint.DbLsn; import com.sleepycat.je.utilint.TestHook; import com.sleepycat.je.utilint.Tracer; /** * The Evictor looks through the INList for IN's and BIN's that are worthy of * eviction. Once the nodes are selected, it removes all references to them so * that they can be GC'd by the JVM. */ public class Evictor extends DaemonThread { public static final String SOURCE_DAEMON = "daemon"; public static final String SOURCE_MANUAL = "manual"; public static final String SOURCE_CRITICAL = "critical"; private static final boolean DEBUG = false; private EnvironmentImpl envImpl; private LogManager logManager; private Level detailedTraceLevel; // level value for detailed trace msgs private volatile boolean active; // true if eviction is happening. /* Round robin marker in the INList, indicates start of eviction scans. */ private IN nextNode; /* The number of bytes we need to evict in order to get under budget. */ private long currentRequiredEvictBytes; /* 1 node out of <nodesPerScan> are chosen for eviction. */ private int nodesPerScan; /* je.evictor.evictBytes */ private long evictBytesSetting; /* je.evictor.lruOnly */ private boolean evictByLruOnly; /* for trace messages. */ private NumberFormat formatter; /* * Stats */ /* Number of passes made to the evictor. */ private int nEvictPasses = 0; /* Number of nodes selected to evict. */ private long nNodesSelected = 0; private long nNodesSelectedThisRun; /* Number of nodes scanned in order to select the eviction set */ private int nNodesScanned = 0; private int nNodesScannedThisRun; /* * Number of nodes evicted on this run. This could be understated, as a * whole subtree may have gone out with a single node. */ private long nNodesEvicted = 0; private long nNodesEvictedThisRun; /* Number of BINs stripped. */ private long nBINsStripped = 0; private long nBINsStrippedThisRun; /* Debugging and unit test support. */ EvictProfile evictProfile; private TestHook runnableHook; public Evictor(EnvironmentImpl envImpl, String name) throws DatabaseException { super(0, name, envImpl); this.envImpl = envImpl; logManager = envImpl.getLogManager(); nextNode = null; DbConfigManager configManager = envImpl.getConfigManager(); nodesPerScan = configManager.getInt (EnvironmentParams.EVICTOR_NODES_PER_SCAN); evictBytesSetting = configManager.getLong (EnvironmentParams.EVICTOR_EVICT_BYTES); evictByLruOnly = configManager.getBoolean (EnvironmentParams.EVICTOR_LRU_ONLY); detailedTraceLevel = Tracer.parseLevel (envImpl, EnvironmentParams.JE_LOGGING_LEVEL_EVICTOR); evictProfile = new EvictProfile(); formatter = NumberFormat.getNumberInstance(); active = false; } public String toString() { StringBuffer sb = new StringBuffer(); sb.append("<Evictor name=\"").append(name).append("\"/>"); return sb.toString(); } /** * Evictor doesn't have a work queue so just throw an exception if it's * ever called. */ public void addToQueue(Object o) throws DatabaseException { throw new DatabaseException ("Evictor.addToQueue should never be called."); } /** * Load stats. */ public void loadStats(StatsConfig config, EnvironmentStats stat) throws DatabaseException { stat.setNEvictPasses(nEvictPasses); stat.setNNodesSelected(nNodesSelected); stat.setNNodesScanned(nNodesScanned); stat.setNNodesExplicitlyEvicted(nNodesEvicted); stat.setNBINsStripped(nBINsStripped); stat.setRequiredEvictBytes(currentRequiredEvictBytes); if (config.getClear()) { nEvictPasses = 0; nNodesSelected = 0; nNodesScanned = 0; nNodesEvicted = 0; nBINsStripped = 0; } } synchronized public void clearEnv() { envImpl = null; } /** * Return the number of retries when a deadlock exception occurs. */ protected int nDeadlockRetries() throws DatabaseException { return envImpl.getConfigManager().getInt (EnvironmentParams.EVICTOR_RETRY); } /** * Wakeup the evictor only if it's not already active. */ public void alert() { if (!active) { wakeup(); } } /** * Called whenever the daemon thread wakes up from a sleep. */ public void onWakeup() throws DatabaseException { if (envImpl.isClosed()) { return; } doEvict(SOURCE_DAEMON, false); } /** * May be called by the evictor thread on wakeup or programatically. */ public void doEvict(String source) throws DatabaseException { doEvict(source, false /*evictDuringShutdown*/); } /** * Allows performing eviction during shutdown, which is needed when * during checkpointing and cleaner log file deletion. */ private synchronized void doEvict(String source, boolean evictDuringShutdown) throws DatabaseException { /* * We use an active flag to prevent reentrant calls. This is simpler * than ensuring that no reentrant eviction can occur in any caller. * We also use the active flag to determine when it is unnecessary to * wake up the evictor thread. */ if (active) { return; } active = true; try { /* * Repeat as necessary to keep up with allocations. Stop if no * progress is made, to prevent an infinite loop. */ boolean progress = true; while (progress && (evictDuringShutdown || !isShutdownRequested()) && isRunnable(source)) { if (evictBatch(source, currentRequiredEvictBytes) == 0) { progress = false; } } } finally { active = false; } } /** * Do a check on whether synchronous eviction is needed. */ public void doCriticalEviction() throws DatabaseException { MemoryBudget mb = envImpl.getMemoryBudget(); long currentUsage = mb.getCacheMemoryUsage(); long maxMem = mb.getCacheBudget(); long over = currentUsage - maxMem; if (over > mb.getCriticalThreshold()) { if (DEBUG) { System.out.println("***critical detected:" + over); } doEvict(SOURCE_CRITICAL, true /*evictDuringShutdown*/); } } /** * Each iteration will latch and unlatch the major INList, and will attempt * to evict requiredEvictBytes, but will give up after a complete pass * over the major INList. Releasing the latch is important because it * provides an opportunity for to add the minor INList to the major INList. * * @return the number of bytes evicted, or zero if no progress was made. */ long evictBatch(String source, long requiredEvictBytes) throws DatabaseException { nNodesSelectedThisRun = 0; nNodesEvictedThisRun = 0; nNodesScannedThisRun = 0; nBINsStrippedThisRun = 0; nEvictPasses++; assert evictProfile.clear(); // intentional side effect int nBatchSets = 0; boolean finished = false; long evictBytes = 0; /* Evict utilization tracking info without holding the INList latch. */ evictBytes += envImpl.getUtilizationTracker().evictMemory(); INList inList = envImpl.getInMemoryINs(); inList.latchMajor(); int inListStartSize = inList.getSize(); try { /* * Setup the round robin iterator. Note that because critical * eviction is now called during recovery, when the INList is * sometimes abruptly cleared, nextNode may not be null when the * INList is empty. */ if (inListStartSize == 0) { nextNode = null; return 0; } else { if (nextNode == null) { nextNode = inList.first(); } } ScanIterator scanIter = new ScanIterator(nextNode, inList); /* * Keep evicting until we've freed enough memory or we've visited * the maximum number of nodes allowed. Each iteration of the while * loop is called an eviction batch. * * In order to prevent endless evicting and not keep the INList * major latch for too long, limit this run to one pass over the IN * list. */ while ((evictBytes < requiredEvictBytes) && (nNodesScannedThisRun <= inListStartSize)) { IN target = selectIN(inList, scanIter); if (target == null) { break; } else { assert evictProfile.count(target);//intentional side effect evictBytes += evict(inList, target, scanIter); } nBatchSets++; } /* * At the end of the scan, look at the next element in the INList * and put it in nextNode for the next time we scan the INList. */ nextNode = scanIter.mark(); finished = true; } finally { nNodesScanned += nNodesScannedThisRun; inList.releaseMajorLatch(); Logger logger = envImpl.getLogger(); if (logger.isLoggable(detailedTraceLevel)) { /* Ugh, only create trace message when logging. */ Tracer.trace(detailedTraceLevel, envImpl, "Evictor: "+"pass=" + nEvictPasses + " finished=" + finished + " source=" + source + " requiredEvictBytes=" + formatter.format(requiredEvictBytes) + " evictBytes=" + formatter.format(evictBytes) + " inListSize=" + inListStartSize + " nNodesScanned=" + nNodesScannedThisRun + " nNodesSelected=" + nNodesSelectedThisRun + " nEvicted=" + nNodesEvictedThisRun + " nBINsStripped=" + nBINsStrippedThisRun + " nBatchSets=" + nBatchSets); } } assert LatchSupport.countLatchesHeld() == 0: "latches held = " + LatchSupport.countLatchesHeld(); return evictBytes; } /** * Return true if eviction should happen. */ boolean isRunnable(String source) throws DatabaseException { MemoryBudget mb = envImpl.getMemoryBudget(); long currentUsage = mb.getCacheMemoryUsage(); long maxMem = mb.getCacheBudget(); boolean doRun = ((currentUsage - maxMem) > 0); /* If running, figure out how much to evict. */ if (doRun) { currentRequiredEvictBytes = (currentUsage - maxMem) + evictBytesSetting; if (DEBUG) { if (source == SOURCE_CRITICAL) { System.out.println("executed: critical runnable"); } } } /* unit testing, force eviction */ if (runnableHook != null) { doRun = ((Boolean) runnableHook.getHookValue()).booleanValue(); currentRequiredEvictBytes = maxMem; } /* * This trace message is expensive, only generate if tracing at this * level is enabled. */ Logger logger = envImpl.getLogger(); if (logger.isLoggable(detailedTraceLevel)) { /* * Generate debugging output. Note that Runtime.freeMemory * fluctuates over time as the JVM grabs more memory, so you really * have to do totalMemory - freeMemory to get stack usage. (You * can't get the concept of memory available from free memory.) */ Runtime r = Runtime.getRuntime(); long totalBytes = r.totalMemory(); long freeBytes= r.freeMemory(); long usedBytes = r.totalMemory() - r.freeMemory(); StringBuffer sb = new StringBuffer(); sb.append(" source=").append(source); sb.append(" doRun=").append(doRun); sb.append(" JEusedBytes=").append(formatter.format(currentUsage)); sb.append(" requiredEvict="). append(formatter.format(currentRequiredEvictBytes)); sb.append(" JVMtotalBytes= ").append(formatter.format(totalBytes)); sb.append(" JVMfreeBytes= ").append(formatter.format(freeBytes)); sb.append(" JVMusedBytes= ").append(formatter.format(usedBytes)); logger.log(detailedTraceLevel, sb.toString()); } return doRun; } /** * Select a single node to evict. */ private IN selectIN(INList inList, ScanIterator scanIter) throws DatabaseException { /* Find the best target in the next <nodesPerScan> nodes. */ IN target = null; long targetGeneration = Long.MAX_VALUE; int targetLevel = Integer.MAX_VALUE; boolean targetDirty = true; boolean envIsReadOnly = envImpl.isReadOnly(); int scanned = 0; boolean wrapped = false; while (scanned < nodesPerScan) { if (scanIter.hasNext()) { IN in = scanIter.next(); nNodesScannedThisRun++; DatabaseImpl db = in.getDatabase(); /* * We don't expect to see an IN with a database that has * finished delete processing, because it would have been * removed from the inlist during post-delete cleanup. */ if (db == null || db.isDeleteFinished()) { String inInfo = " IN type=" + in.getLogType() + " id=" + in.getNodeId() + " not expected on INList"; String errMsg = (db == null) ? inInfo : "Database " + db.getDebugName() + " id=" + db.getId() + inInfo; throw new DatabaseException(errMsg); } /* Ignore if the db is in the middle of delete processing. */ if (db.isDeleted()) { continue; } /* * Don't evict the DatabaseImpl Id Mapping Tree (db 0), both * for object identity reasons and because the id mapping tree * should stay cached. */ if (db.getId().equals(DbTree.ID_DB_ID)) { continue; } /* * If this is a read only database and we have at least one * target, skip any dirty INs (recovery dirties INs even in a * read-only environment). We take at least one target so we * don't loop endlessly if everything is dirty. */ if (envIsReadOnly && (target != null) && in.getDirty()) { continue; } /* * Only scan evictable or strippable INs. This prevents higher * level INs from being selected for eviction, unless they are * part of an unused tree. */ int evictType = in.getEvictionType(); if (evictType == IN.MAY_NOT_EVICT) { continue; } /* * This node is in the scanned node set. Select according to * the configured eviction policy. */ if (evictByLruOnly) { /* * Select the node with the lowest generation number, * irrespective of tree level or dirtyness. */ if (targetGeneration > in.getGeneration()) { targetGeneration = in.getGeneration(); target = in; } } else { /* * Select first by tree level, then by dirtyness, then by * generation/LRU. */ int level = normalizeLevel(in, evictType); if (targetLevel != level) { if (targetLevel > level) { targetLevel = level; targetDirty = in.getDirty(); targetGeneration = in.getGeneration(); target = in; } } else if (targetDirty != in.getDirty()) { if (targetDirty) { targetDirty = false; targetGeneration = in.getGeneration(); target = in; } } else { if (targetGeneration > in.getGeneration()) { targetGeneration = in.getGeneration(); target = in; } } } scanned++; } else { /* We wrapped around in the list. */ if (wrapped) { break; } else { nextNode = inList.first(); scanIter.reset(nextNode); wrapped = true; } } } if (target != null) { nNodesSelectedThisRun++; nNodesSelected++; } return target; } /** * Normalize the tree level of the given IN. * * Is public for unit testing. * * A BIN containing evictable LNs is given level 0, so it will be stripped * first. For non-duplicate and DBMAP trees, the high order bits are * cleared to make their levels correspond; that way, all bottom level * nodes (BINs and DBINs) are given the same eviction priority. * * Note that BINs in a duplicate tree are assigned the same level as BINs * in a non-duplicate tree. This isn't always optimimal, but is the best * we can do considering that BINs in duplicate trees may contain a mix of * LNs and DINs. */ public int normalizeLevel(IN in, int evictType) { int level = in.getLevel() & IN.LEVEL_MASK; if (level == 1 && evictType == IN.MAY_EVICT_LNS) { level = 0; } return level; } /** * Strip or evict this node. * @return number of bytes evicted. */ private long evict(INList inList, IN target, ScanIterator scanIter) throws DatabaseException { boolean envIsReadOnly = envImpl.isReadOnly(); long evictedBytes = 0; /* * Non-BIN INs are evicted by detaching them from their parent. For * BINS, the first step is to remove deleted entries by compressing * the BIN. The evictor indicates that we shouldn't fault in * non-resident children during compression. After compression, * LN stripping may be performed. * * If LN stripping is used, first we strip the BIN by merely detaching * all its resident LN targets. If we make progress doing that, we * stop and will not evict the BIN itself until possibly later. If it * has no resident LNs then we evict the BIN itself using the "regular" * detach-from-parent routine. * * If the cleaner is doing clustering, we don't do BIN stripping if we * can write out the BIN. Specifically LN stripping is not performed * if the BIN is dirty AND the BIN is evictable AND cleaner * clustering is enabled. In this case the BIN is going to be written * out soon, and with clustering we want to be sure to write out the * LNs with the BIN; therefore we don't do stripping */ /* * Use latchNoWait because if it's latched we don't want the cleaner * to hold up eviction while it migrates an entire BIN. Latched INs * have a high generation value, so not evicting makes sense. Pass * false because we don't want to change the generation during the * eviction process. */ if (target.latchNoWait(false)) { try { if (target instanceof BIN) { /* first attempt to compress deleted, resident children.*/ envImpl.lazyCompress(target); /* * Strip any resident LN targets right now. No need to * dirty the BIN, the targets are not persistent data. */ evictedBytes = ((BIN) target).evictLNs(); if (evictedBytes > 0) { nBINsStrippedThisRun++; nBINsStripped++; } } /* * If we were able to free any memory by LN stripping above, * then we postpone eviction of the BIN until a later pass. */ if (evictedBytes == 0 && target.isEvictable()) { /* Regular eviction. */ Tree tree = target.getDatabase().getTree(); /* getParentINForChildIN unlatches target. */ SearchResult result = tree.getParentINForChildIN (target, true, // requireExactMatch false); // updateGeneration if (result.exactParentFound) { evictedBytes = evictIN(target, result.parent, result.index, inList, scanIter, envIsReadOnly); } } } finally { target.releaseLatchIfOwner(); } } return evictedBytes; } /** * Evict an IN. Dirty nodes are logged before they're evicted. inlist is * latched with the major latch by the caller. */ private long evictIN(IN child, IN parent, int index, INList inlist, ScanIterator scanIter, boolean envIsReadOnly) throws DatabaseException { long evictBytes = 0; try { assert parent.isLatchOwner(); long oldGenerationCount = child.getGeneration(); /* * Get a new reference to the child, in case the reference * saved in the selection list became out of date because of * changes to that parent. */ IN renewedChild = (IN) parent.getTarget(index); /* * See the evict() method in this class for an explanation for * calling latchNoWait(false). */ if ((renewedChild != null) && (renewedChild.getGeneration() <= oldGenerationCount) && renewedChild.latchNoWait(false)) { try { if (renewedChild.isEvictable()) { /* * Log the child if dirty and env is not r/o. Remove * from IN list. */ long renewedChildLsn = DbLsn.NULL_LSN; boolean newChildLsn = false; if (renewedChild.getDirty()) { if (!envIsReadOnly) { /* * Determine whether provisional logging is * needed. The checkpointer can be null if it * was shutdown or never started. */ boolean logProvisional = (envImpl.getCheckpointer() != null && (renewedChild.getLevel() < envImpl. getCheckpointer(). getHighestFlushLevel())); /* * Log a full version (no deltas) and with * cleaner migration allowed. */ renewedChildLsn = renewedChild.log (logManager, false, // allowDeltas logProvisional, true, // proactiveMigration parent); newChildLsn = true; } } else { renewedChildLsn = parent.getLsn(index); } if (renewedChildLsn != DbLsn.NULL_LSN) { /* Take this off the inlist. */ scanIter.mark(); inlist.removeLatchAlreadyHeld(renewedChild); scanIter.resetToMark(); evictBytes = renewedChild.getInMemorySize(); if (newChildLsn) { /* * Update the parent so its reference is * null and it has the proper LSN. */ parent.updateEntry (index, null, renewedChildLsn); } else { /* * Null out the reference, but don't dirty * the node since only the reference * changed. */ parent.updateEntry(index, (Node) null); } /* Stats */ nNodesEvictedThisRun++; nNodesEvicted++; } } } finally { renewedChild.releaseLatch(); } } } finally { parent.releaseLatch(); } return evictBytes; } /** * Used by unit tests. */ IN getNextNode() { return nextNode; } /* For unit testing only. */ public void setRunnableHook(TestHook hook) { runnableHook = hook; } /* For debugging and unit tests. */ static public class EvictProfile { /* Keep a list of candidate nodes. */ private List candidates = new ArrayList(); /* Remember that this node was targetted. */ public boolean count(IN target) { candidates.add(new Long(target.getNodeId())); return true; } public List getCandidates() { return candidates; } public boolean clear() { candidates.clear(); return true; } } /* * ScanIterator keeps a handle onto the current round robin INList * iterator. It's deliberately not a member of the class in order to keep * less common state in the class. */ private static class ScanIterator { private INList inList; private Iterator iter; private IN nextMark; ScanIterator(IN startingIN, INList inList) throws DatabaseException { this.inList = inList; reset(startingIN); } void reset(IN startingIN) throws DatabaseException { iter = inList.tailSet(startingIN).iterator(); } IN mark() throws DatabaseException { if (iter.hasNext()) { nextMark = (IN) iter.next(); } else { nextMark = (IN) inList.first(); } return (IN) nextMark; } void resetToMark() throws DatabaseException { reset(nextMark); } boolean hasNext() { return iter.hasNext(); } IN next() { return (IN) iter.next(); } void remove() { iter.remove(); } } }